{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [],
   "source": [
    "import pickle as pkl\n",
    "import pandas as pd\n",
    "import os\n",
    "import os.path as osp\n",
    "import numpy as np"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "metadata": {},
   "outputs": [],
   "source": [
    "df = pd.read_csv('../aic20_attributes/que_scenetext.csv')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "metadata": {},
   "outputs": [],
   "source": [
    "scenetext_dict = {}\n",
    "for i, img_name in enumerate(df[\"image_name\"]):\n",
    "    if img_name not in scenetext_dict:\n",
    "        scenetext_dict[img_name] = []\n",
    "    scenetext_dict[img_name].append(\n",
    "        {df['single_word'][i]: df['score'][i]})"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": 103,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "../aic20_attributes/gal_wheel.csv\n",
      "../aic20_attributes/que_wheel.csv\n",
      "../aic20_attributes/gal_viewpoint.csv\n",
      "../aic20_attributes/que_viewpoint.csv\n",
      "../aic20_attributes/gal_type6.csv\n",
      "../aic20_attributes/que_type6.csv\n",
      "../aic20_attributes/gal_type8.csv\n",
      "../aic20_attributes/que_type8.csv\n",
      "../aic20_attributes/que_topview.csv\n",
      "../aic20_attributes/gal_topview.csv\n"
     ]
    }
   ],
   "source": [
    "attrb_files = [\n",
    "    (\"../aic20_attributes/gal_wheel.csv\", \"gal_wheel\"),\n",
    "    (\"../aic20_attributes/que_wheel.csv\", \"que_wheel\"),\n",
    "    (\"../aic20_attributes/gal_viewpoint.csv\", \"gal_view\"),\n",
    "    (\"../aic20_attributes/que_viewpoint.csv\", \"que_view\"),\n",
    "    (\"../aic20_attributes/gal_type6.csv\", \"gal_type6\"),\n",
    "    (\"../aic20_attributes/que_type6.csv\", \"que_type6\"),\n",
    "    (\"../aic20_attributes/gal_type8.csv\", \"gal_type8\"),\n",
    "    (\"../aic20_attributes/que_type8.csv\", \"que_type8\"),\n",
    "#     (\"../aic20_attributes/que_rearlight.csv\", \"que_rearlight\"),\n",
    "#     (\"../aic20_attributes/gal_rearlight.csv\", \"gal_rearlight\"),\n",
    "    (\"../aic20_attributes/que_topview.csv\", \"que_topview\"),\n",
    "    (\"../aic20_attributes/gal_topview.csv\", \"gal_topview\"),\n",
    "]\n",
    "\n",
    "def build_dict(csv_file, dict_name):\n",
    "    df = pd.read_csv(csv_file)\n",
    "    img2attri = {}\n",
    "    print(csv_file)\n",
    "    for i, fname in enumerate(df[\"image_name\"]):\n",
    "        fname = int(fname.split('.')[0])\n",
    "        lbl = df['label'][i]\n",
    "        scr = df['score'][i]\n",
    "        img2attri[fname]= {'lbl':lbl, 'scr': scr}\n",
    "    return (dict_name, img2attri)\n",
    "\n",
    "aic20_attrbs_dict = {}\n",
    "for i in range(len(attrb_files)):\n",
    "    dict_key, dict_val = build_dict(*attrb_files[i])\n",
    "    aic20_attrbs_dict[dict_key] = dict_val\n",
    "    \n",
    "pkl.dump(aic20_attrbs_dict, open(\n",
    "    \"../aic20_attributes/aic20_attribs.pkl\", \"wb\"))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 104,
   "metadata": {},
   "outputs": [],
   "source": [
    "count = 0\n",
    "quelen = 1052\n",
    "gallen = 18920\n",
    "que_img_attrib = {i:{}for i in range(1,quelen+1)}\n",
    "gal_img_attrib = {i:{}for i in range(1,gallen+1)}\n",
    "for dict_name in aic20_attrbs_dict:\n",
    "    if \"que\" in dict_name:\n",
    "        for i in range(1,quelen+1):\n",
    "            if (i in aic20_attrbs_dict[dict_name]):\n",
    "                que_img_attrib[i][dict_name]=aic20_attrbs_dict[dict_name][i]\n",
    "            else:\n",
    "                que_img_attrib[i][dict_name] = {'lbl':-1, 'scr':0.0}\n",
    "    else:\n",
    "        for i in range(1,gallen+1):\n",
    "            if (i in aic20_attrbs_dict[dict_name]):\n",
    "                gal_img_attrib[i][dict_name]=aic20_attrbs_dict[dict_name][i]\n",
    "            else:\n",
    "                gal_img_attrib[i][dict_name] = {'lbl':-1, 'scr':0.0}"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 105,
   "metadata": {},
   "outputs": [],
   "source": [
    "pkl.dump(que_img_attrib, open(\n",
    "    \"../aic20_attributes/aic20_que_imgs_attribs.pkl\", \"wb\"))\n",
    "pkl.dump(gal_img_attrib, open(\n",
    "    \"../aic20_attributes/aic20_gal_imgs_attribs.pkl\", \"wb\"))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "que_img_attrib"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 129,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "{'4954444', 'wrestoration', '563', '2659', 'worrywhe', 'apless', '563543', '4887', '400', 'uctric', 'intermodal', 'homecare', '268'}\n",
      "13\n",
      "{242: {'intermodal': [283, 283]}, 328: {'wrestoration': [57, 57, 356]}, 350: {'worrywhe': [217], '400': [185, 683, 217, 508, 217, 771, 758, 185, 185, 683, 217, 508, 217, 771, 758, 185]}, 352: {'wrestoration': [57, 57, 356], '563543': [57, 57], '4887': [57]}, 399: {'homecare': [260, 96]}, 444: {'uctric': [484]}, 647: {'268': [565, 536, 565, 553, 175, 536, 565]}, 827: {'apless': [161]}, 901: {'4954444': [332, 332], '563': [185, 683, 508, 771, 57, 57, 758, 185]}, 916: {'2659': [166, 553]}}\n"
     ]
    }
   ],
   "source": [
    "#Scence Text\n",
    "text_match = {}\n",
    "selected_kws = []\n",
    "with open(\"../aic20_attributes/scencetext/output_dist3.txt\") as fi:\n",
    "    lines = fi.readlines()\n",
    "    for line in lines:\n",
    "        info = line.strip().split(' ')\n",
    "        img_id = int(info[0].split('.')[0])\n",
    "        que_kw = info[1]\n",
    "        if len(que_kw) == 1 or que_kw in ['stop', 'only', 'green', 'cab', 'the']:\n",
    "            continue\n",
    "        if len(que_kw) <= 5  and not que_kw.isdigit():\n",
    "            continue\n",
    "        selected_kws.append(que_kw)\n",
    "        track_id = int(info[2]) - 1\n",
    "        if img_id not in text_match:\n",
    "            text_match[img_id] = {}\n",
    "        if que_kw not in text_match[img_id]:\n",
    "            text_match[img_id][que_kw] = []\n",
    "        text_match[img_id][que_kw].append(track_id)\n",
    "\n",
    "print(set(selected_kws))\n",
    "print(len(set(selected_kws)))\n",
    "print(text_match)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 131,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "{'268': [565, 536, 565, 553, 175, 536, 565]}"
      ]
     },
     "execution_count": 131,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "text_match[647]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 130,
   "metadata": {},
   "outputs": [],
   "source": [
    "pkl.dump(text_match, open(\n",
    "    \"../aic20_attributes/aic20_scencetext_attribs.pkl\", \"wb\"))"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.7.6"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 4
}
